*===============================================================================
* Merge data on initial conditions and modify codes
*===============================================================================

clear all
set more off

cd "$dta_files

*===============================================================================
* 1) Merge EYB Data and modify codes
*===============================================================================
use IC_EU_IND.dta
merge 1:1 nuts using IC_EU_AGE, nogen
merge 1:1 nuts using IC_EU_FEMP.dta, nogen
merge 1:1 nuts using IC_EU_GDP, nogen

*-------------------------------------------------------------------------------
* s. Rename codes (composite codes that are considered 
* to be rough proxies for NUTS2 codes in later periods)
*-------------------------------------------------------------------------------
replace nuts="DE73" if nuts=="DE724&DE73"
replace nuts="DE71&DE72" if nuts=="DE71&DE721&DE722&DE723&DE725"
replace nuts="DE91" if nuts=="DE911&DE912&DE91B&DE917&DE91A&DE916"|nuts=="DE911&DE912&DE91B&DE917"|nuts=="DE91A&DE925&DE926&DE918&DE916&DE919&DE915"

replace nuts="DE93" if nuts=="DE931&DE93A&DE934&DE935&DE933&DE938&DE914&DE913"|nuts=="DE932&DE939&DE937&DE93B&DE936"
replace nuts="DE94" if nuts=="DE944&DE94E&DE949&DE94B"|nuts=="DE94C&DE947&DE942&DE94H"|nuts=="DE94A&DE945&DE94G&DE946&DE943&DE94D&DE941&DE948&DE94F"
replace nuts="DEA2" if nuts=="DEA22&DEA23&DEA24&DEA27&DEA2A&DEA2B&DEA2C"|nuts=="DEA2D&DEA29&DEA26&DEA28"

tempfile precollapse_1
save `precollapse_1'

collapse (mean) age_* EMP_female ind_* serv_* gdp*, by(nuts)

tempfile postcollapse_1
save `postcollapse_1'

use `precollapse_1', clear

gen weight = age_total
	replace weight=1 if (weight==.|weight==0)

collapse (mean) gdp_per_capita EMP_share_female [aw=weight], by(nuts)

merge 1:1 nuts using `postcollapse_1', nogen

*===============================================================================
* 2) Add in UK Census data
* UK industry data for 1981 is more detailed, including finance, public 
* administration and energy. Use detail from 1981 while keeping the industry 
* shares at their 1971 levels.Note Scotland and Ireland  not in the Special
* Workplace Statistics. Values from EYB are retained. 
*===============================================================================

gen country=substr(nuts,1,2)

foreach var of varlist ind* serv* {
	replace `var' = . if country=="UK" & nuts!="UKM" & nuts!="UKN" /// 
			& nuts!="UKN0" 
}

replace age_total = . if age_total==0
merge 1:1 nuts using "$dta_files/IC_UK_CENS1971.dta", ///
		update gen(_merge_uk_census_1971)

gen scotland_ireland = substr(nuts,1,3) if country=="UK"

foreach var of varlist ind* serv* {
	gen `var'_1971 = `var'
	replace `var' = . if (_merge_uk_census_1971==4 | ///
	_merge_uk_census_1971==5) & scotland!="UKM" & scotland!="UKN"
}

merge 1:1 nuts using "$dta_files/IC_UK_SWS1981.dta", update 

* rescale 1981 detail from the Special Workplace Statistics downward to 
* reflect 1971 shares. 

replace ind_agro	= ind_agro_1971 if country=="UK" & scotland!="UKM" ///
				& scotland!="UKN"
replace ind_construction= ind_construction_1971 if country=="UK" & ///
				scotland!="UKM" & scotland!="UKN"
replace ind_metals 	= ind_metals/(ind_metals + ind_mining + ind_energy ///
			+ ind_other) * (ind_mnfg_1971 + ind_mining_1971) ///
			if country=="UK" & scotland!="UKM" & scotland!="UKN"
replace ind_mining 	= ind_mining/(ind_metals + ind_mining + ind_energy + ///
			ind_other) * (ind_mnfg_1971 + ind_mining_1971) if ///
			country=="UK" & scotland!="UKM" & scotland!="UKN"
replace ind_energy 	= ind_energy/(ind_metals + ind_mining + ind_energy + ///
			ind_other)* (ind_mnfg_1971 + ind_mining_1971) if ///
			country=="UK" & scotland!="UKM" & scotland!="UKN"
replace ind_other 	= ind_other/(ind_metals + ind_mining + ind_energy + ///
			ind_other) * (ind_mnfg_1971 + ind_mining_1971) if ///
			country=="UK" & scotland!="UKM" & scotland!="UKN"

replace ind_total 	= ind_construction + ind_metals + ind_mining + ///
			ind_energy + ind_other if country=="UK" & ///
			scotland!="UKM" & scotland!="UKN"
replace ind_mnfg 	= ind_metals + ind_other if country=="UK" & ///
			scotland!="UKM" & scotland!="UKN"

replace serv_commerce	= serv_commerce/serv_total * serv_total_1971  ///
			if country=="UK" & scotland!="UKM" & scotland!="UKN"
replace serv_transport 	= serv_transport/serv_total * serv_total_1971 ///
			if country=="UK" & scotland!="UKM" & scotland!="UKN"
replace serv_admin 	= serv_admin/serv_total * serv_total_1971  if ///
			country=="UK" & scotland!="UKM" & scotland!="UKN"
replace serv_credit 	= serv_credit/serv_total * serv_total_1971 if ///
			country=="UK" & scotland!="UKM" & scotland!="UKN"
replace serv_other 	= serv_other/serv_total * serv_total_1971 if ///
			country=="UK" & scotland!="UKM" & scotland!="UKN"
replace serv_total 	= serv_commerce + serv_transport + serv_admin + ///
			serv_credit + serv_other if country=="UK" & ///
			scotland!="UKM" & scotland!="UKN"

*Use Scotland Industry Breakdown from Eurostat 
foreach var of varlist ind_agro-serv_total {
	gen `var'_eyb = `var' if nuts=="UKM" 
	egen `var'_m = min(`var'_eyb) if scotland=="UKM"
	drop `var'_eyb
	rename `var'_m `var'_eyb
}

replace ind_metals 	= ind_metals_eyb/(ind_metals_eyb + ind_mining_eyb + ///
			ind_energy_eyb + ind_other_eyb) * (ind_mnfg + ///
			ind_mining) if scotland=="UKM"
replace ind_mining 	= ind_mining_eyb/(ind_metals_eyb + ind_mining_eyb + ///
			ind_energy_eyb + ind_other_eyb) * (ind_mnfg + ///
			ind_mining) if scotland=="UKM"
replace ind_energy 	= ind_energy_eyb/(ind_metals_eyb + ind_mining_eyb + ///
			ind_energy_eyb + ind_other_eyb)* (ind_mnfg + ///
			ind_mining) if scotland=="UKM"
replace ind_other 	= ind_other_eyb/(ind_metals_eyb + ind_mining_eyb + ///
			ind_energy_eyb + ind_other_eyb) * (ind_mnfg + ///
			ind_mining) if scotland=="UKM"

replace ind_total 	= ind_construction + ind_metals + ind_mining + ///
			ind_energy + ind_other if scotland=="UKM"
replace ind_mnfg 	= ind_metals + ind_other if scotland=="UKM"

replace serv_commerce	= serv_commerce_eyb/serv_total_eyb * serv_total  ///
			if scotland=="UKM"
replace serv_transport 	= serv_transport_eyb/serv_total_eyb * serv_total if ///
			scotland=="UKM"
replace serv_admin 	= serv_admin_eyb/serv_total_eyb * serv_total  if ///
			scotland=="UKM"
replace serv_credit 	= serv_credit_eyb/serv_total_eyb * serv_total if ///
			scotland=="UKM"
replace serv_other 	= serv_other_eyb/serv_total_eyb * serv_total if ///
			scotland=="UKM"
replace serv_total 	= serv_commerce + serv_transport + serv_admin + ///
			serv_credit + serv_other if scotland=="UKM"

drop *_1971 *_eyb scotland 

*===============================================================================
* 3) Merge all other data
*===============================================================================

merge 1:1 nuts using IC_EU_education.dta, nogen update
merge 1:1 nuts using IC_EU_hh_own.dta, nogen  update
merge 1:1 nuts using IC_EU_migration.dta, nogen  update
merge 1:1 nuts using IC_EU_IPUMS.dta, nogen update

merge 1:1 nuts using IC_UK_education.dta, nogen update

merge 1:1 nuts using IC_US_CENS1970.dta, nogen update
merge 1:1 nuts using IC_US_IND_GDP.dta, nogen update

merge 1:1 nuts using IC_CA_GDP_IND.dta, nogen update 
merge 1:1 nuts using IC_CA_migration.dta, nogen update
merge 1:1 nuts using IC_CA_CENS1971.dta, nogen update
merge 1:1 nuts using IC_CA_home_own.dta, nogen update
merge 1:1 nuts using IC_CA_hh_size.dta, nogen update

merge 1:1 nuts using IC_urbanization.dta, nogen 
foreach var of varlist urban* {
	replace `var' = .0001 if `var'==0 // give a positive value so it is not
//replaced with "."
}

*-------------------------------------------------------------------------------
* Generate composite nuts
*-------------------------------------------------------------------------------
tempfile append
save 	`append.dta'

gen composite_nuts=""
replace composite_nuts="FR82&FR83" if nuts=="FR82"|nuts=="FR83"
replace composite_nuts="NL21&NL23" if nuts=="NL21"|nuts=="NL23"
replace composite_nuts="ITH1&ITH2" if nuts=="ITH1"|nuts=="ITH2"
replace composite_nuts="DE91&DE92" if nuts=="DE925&DE926&DE918&DE919&DE915"| ///
	nuts=="DE922&DE923&DE927&DE928&DE929"|nuts=="DE91"|nuts=="DE92"
replace composite_nuts="DE71&DE72" if nuts=="DE71"|nuts=="DE72"
drop if composite_nuts==""

tempfile precollapse_3
save `precollapse_3.dta'

collapse (sum) age_* EMP_female edatt* gdp hh home* ind_* serv_*  inmig outmig ///
	 urban* total* migration_pop (firstnm) migration_yrs ///  
	, by(composite_nuts) 

tempfile postcollapse_2
save `postcollapse_2.dta'

use `precollapse_3.dta', clear

* Gen weights for mean collapse
gen wgt = age_total
	replace wgt = 1 if wgt==.|wgt==0
	
collapse (mean) EMP_share gdp_per_capita hh_size [aw=wgt], by(composite_nuts year)

merge 1:1 composite_nuts using `postcollapse_2.dta', assert(3) nogen

* replace zeros from previous "collapses" with "."
foreach v of var age_* EMP* edatt* gdp* hh home* ind_* serv_* inmig outmig ///
		 urban* total* migration_pop {
replace `v'=. if `v'==0
}

rename composite_nuts nuts
append using `append.dta'

collapse (mean) age_* EMP* edatt* gdp* hh home* ind_* serv_*  inmig outmig ///
		urban* total* migration_pop  (firstnm) migration_yrs ///
	, by(nuts)

*-------------------------------------------------------------------------------
*  Merge data for codes that are the same for nuts2 and nuts3 (eg DE3 and DE30)
*-------------------------------------------------------------------------------
tempfile precollapse_2
save 	`precollapse_2.dta'

replace nuts="BE10" if nuts=="BE1"
replace nuts="DE30" if nuts=="DE3" 
replace nuts="DE40" if nuts=="DE4"
replace nuts="DE50" if nuts=="DE5"
replace nuts="DE60" if nuts=="DE6"
replace nuts="DE80" if nuts=="DE8"
replace nuts="DEC0" if nuts=="DEC"
replace nuts="DEF0" if nuts=="DEF"
replace nuts="DEE0" if nuts=="DEE"
replace nuts="ES30" if nuts=="ES3"
replace nuts="ES70" if nuts=="ES7"
replace nuts="FR10" if nuts=="FR1"
replace nuts="FR30" if nuts=="FR3"
replace nuts="UKN0" if nuts=="UKN"

collapse (mean) age_* EMP* edatt* gdp* hh home* ind_* serv_*  inmig outmig ///
	 urban* total* migration_pop (firstnm) migration_yrs ///  
	, by(nuts) 
	
* replace zeros from previous "collapses" with "."
foreach v of var age_* EMP* edatt* gdp* hh home* ind_* serv_* migration_pop {
replace `v'=. if `v'==0
}

append using `precollapse_2.dta'

collapse (mean) age_* EMP* edatt* gdp* hh home* ind_* serv_*  inmig outmig ///
	 urban* total* migration_pop  (firstnm) migration_yrs ///  
	, by(nuts) 
	
* replace again zeros with "."
foreach v of var age_* EMP* edatt* gdp* hh home* ind_* serv_* inmig outmig ///
		 urban* total* migration_pop {
replace `v'=. if `v'==0
}
*-------------------------------------------------------------------------------
* Create NUTS-level flags and keep observations at NUTS 2 level and lower
*-------------------------------------------------------------------------------
gen country        = substr(nuts,1,2)
gen nuts_level	   = 0 if length(nuts)==2
replace nuts_level = 1 if length(nuts)==3
replace nuts_level = 2 if length(nuts)==4

* Assign composite regions NUTS 2 level 
replace nuts_level = 2 if nuts=="DE71&DE72"|nuts=="DE91&DE92"|nuts=="DEF"| ///
			nuts=="FR82&FR83"|nuts=="ITH1&ITH2"|nuts=="NL21&NL23"

replace nuts_level = 2 if country=="US"|country=="CA"
replace nuts_level = 1 if length(nuts)>6 & country=="US"
replace nuts_level = 0 if nuts=="Canada"| nuts=="Entire U.S." 

drop if nuts_level==. 

********************************************************************************

save "step200_all_initial_conditions.dta", replace


